# -*- coding: utf-8 -*-
"""
@author: Chenglong Chen <c.chenglong@gmail.com>
@brief: generate stacking feature conf for 2nd and 3rd level models

"""

import os
import re
from optparse import OptionParser

import pandas as pd

import config
from utils import time_utils


def grab(pattern, text):
    pat = re.compile(pattern)
    group = re.findall(pat, text)
    return group


def check_valid(model):
    file = "%s/All/test.pred.%s.csv" % (config.OUTPUT_DIR, model)
    try:
        df = pd.read_csv(file)
        if df.shape[0] == config.TEST_SIZE:
            return True
        else:
            return False
    except:
        return False


def get_model_list(log_folder, topN):
    tasks_ens = []
    for file in sorted(os.listdir(log_folder)):
        if not os.path.isfile(os.path.join(log_folder, file)):
            continue
        text = open(os.path.join(log_folder, file), "r").read()

        # grab everything we need
        tasks = grab("(\[Feat@.*)", text)
        rmse_mean = grab("Mean: (.*)", text)
        rmse_std = grab("Std: (.*)", text)
        rmse_mean = [float(x) for x in rmse_mean]
        rmse_std = [float(x) for x in rmse_std]
        L = min(len(tasks), len(rmse_mean), len(rmse_std))
        d = dict(zip(tasks[:L], rmse_mean[:L]))

        # keep the top-N
        ds = sorted(d.items(), key=lambda x: float(x[1]))
        cnt = 0
        for t,v in ds:
            if check_valid(t):
                tasks_ens.append(t)
                print("Read %s : %.6f"%(t, v))
                cnt += 1
                if cnt >= topN:
                    break
        if cnt > 0:
            print("Read %d models from %s"%(cnt, file))

    return tasks_ens


header_pattern = """
# -*- coding: utf-8 -*-
\"\"\"
@author: Chenglong Chen <c.chenglong@gmail.com>
@brief: one stacking feature conf

Generated by
python %s -l %s -t %d -o %s

\"\"\"

feature_list = [

"""


def _create_feature_conf(level, topN, outfile):
    log_folder = "%s/level%d_models"%(config.LOG_DIR, level)
    feature_list = get_model_list(log_folder, topN)
    res = header_pattern%(__file__, level, int(topN), outfile)
    for feature in feature_list:
        res += '"%s",\n'%feature
    res += "]\n"
    with open(os.path.join(config.FEAT_CONF_DIR, outfile), "w") as f:
        f.write(res)


def main(options):
    _create_feature_conf(level=options.level, topN=options.topN, outfile=options.outfile)


def parse_args(parser):
    parser.add_option("-l", "--level", default=2, 
        type="int", dest="level", help="level")
    parser.add_option("-t", "--top", default=10, 
        type="int", dest="topN", help="top-N")
    parser.add_option("-o", "--outfile", 
        default="stacking_feature_conf_%s.py"%time_utils._timestamp(),
        type="string", dest="outfile", help="outfile")
    (options, args) = parser.parse_args()
    return options, args


if __name__ == "__main__":
    parser = OptionParser()
    options, args = parse_args(parser)
    main(options)
